Load Packages:
library("phyloseq")
library("ggplot2")
library("tidyr")
library("RColorBrewer")
library(reshape2)
library(qiime2R)
library(DESeq2)
library("gridExtra")
library(vegan)
library("metagMisc")
library("grid")
library(jcolors)
library("dplyr")
library("breakaway")
library("CoDaSeq")
library("ggbiplot")
library("intrval")
library("tidyverse")
library("ggpubr")
set.seed(1)
Demultiplexed paired-end sequences provided by the OIST sequencing center were imported to QIIME 2 (v2018.11) software (Bolyen et al. 2019). The Divisive Amplicon Denoising Algorithm (DADA) was implemented with the DADA2 plug-in for QIIME 2 for quality filtering, chimera removal, and feature table construction (Callahan et al. 2015). Denoising was implemented for each sequencing run (one for October and one for June), as is suggested by the developers, and feature tables were then merged. Taxonomy was assigned to Amplicon Sequence Variants (ASVs) in the merged feature table by training a classifier on the SILVA v132 97% consensus 16S taxonomy (Quast et al. 2013). The feature table and assigned taxonomy were then exported from QIIME 2 for further analysis.
below is a sample workflow for one sequencing run:
Import the data:
qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path ./16seqs \
--source-format CasavaOneEightSingleLanePerSampleDirFmt \
--output-path 16demux-paired-end.qza
Run DADA2:
qiime dada2 denoise-paired \
--i-demultiplexed-seqs 16demux-paired-end.qza \
--output-dir ./16dada2 \
--o-representative-sequences 16rep-seqs \
--p-trim-left-f 15 \
--p-trim-left-r 15 \
--p-trunc-len-f 295 \
--p-trunc-len-r 280 \
--p-n-threads 3
Merge feature tables from multiple sequencing runs:
#Feature-Table
qiime feature-table merge --i-tables RSJ1_2_mergedtable.qza --i-tables 16dada2/16table.qza --o-merged-table JuneOctMergedTable.qza
#Representative Sequences
qiime feature-table merge-seqs --i-data 16rep-seqs.qza --i-data RSJ1_2_merged-rep-seqs.qza --o-merged-data JuneOctMergedRepSeqs.qza
Train classifier with SILVA v132 97% consensus 16S taxonomy:
qiime tools import \
--type 'FeatureData[Sequence]' \
--input-path $SILVA97otus \
--output-path 97_otus16.qza
qiime tools import \
--type 'FeatureData[Taxonomy]' \
--source-format HeaderlessTSVTaxonomyFormat \
--input-path $Tax97 \
--output-path 97ref-taxonomy16.qza
qiime feature-classifier fit-classifier-naive-bayes \
--i-reference-reads 97_otus16.qza\
--i-reference-taxonomy 97ref-taxonomy16.qza\
--o-classifier 97classifier16.qza
Classify the merged Representative Sequences:
qiime feature-classifier classify-sklearn --i-classifier 97classifier16.qza --i-reads JuneOctMergedRepSeqs.qza --o-classification JuneOctMergedTaxonomy.qza
Export the taxonomic assignments:
qiime metadata tabulate --m-input-file JuneOctMergedTaxonomy.qza --o-visualization JuneOctMergedTaxonomy.qzv
Load 16S ASV table:
phyloseq<-qza_to_phyloseq(features="JuneOctMergedTable.qza")
## Warning in read_qza(features): Artifact was not generated with Qiime2 2018.4,
## if data is not successfully imported, please report here github.com/jbisanz/
## qiime2R/issues
Load metadata:
metatable <- read.csv("JuneOctSampleMap.csv", header = TRUE)
row.names(metatable) <- metatable[["SampleID"]]
detach("package:dplyr", unload=TRUE)
library("dplyr")
metatable <- metatable %>% select(SampleID, everything())
#convert to phyloseq object
META<- sample_data(metatable)
Load taxonomy:
taxonomy <- read.csv("JuneOctTaxonomy.csv", stringsAsFactors = FALSE)
names(taxonomy) <- c("row", "tax", "Confidence")
row.names(taxonomy) <-taxonomy[[1]]
taxonomy <- taxonomy[,(-1)]
#SILVA taxonomy is in one column, separate to be able to work with different taxonomic levels:
taxonomy <- separate(taxonomy, tax, c("D0","D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", "D10", "D11", "D12", "D13", "D14"), sep = ";", fill = "right")
#Keep the first 8 taxonomic levels (no assignments afetr that)
taxonomy <- taxonomy[,c(1:8)]
taxmat <- as.matrix(taxonomy)
#covert taxonomy table to phyloseq object
TAX = tax_table(taxmat)
create phyloseq object including feature table, taxonomy, and metadata:
ps = merge_phyloseq(phyloseq, TAX, META)
Prevalence is the percent of samples that an ASV is present in.
prevdf = apply(X = otu_table(ps),
MARGIN = ifelse(taxa_are_rows(ps), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
prevdf = data.frame(Prevalence = prevdf,
TotalAbundance = taxa_sums(ps),
tax_table(ps))
Plot prevalence against total abundance. Low prevalence and low abundance ASVs are potential sequencing artifacts.
prevplot1<-ggplot(prevdf, aes(TotalAbundance, Prevalence / nsamples(ps),color=D1)) +
geom_hline(yintercept = 0.05, alpha = 0.5, linetype = 2) + geom_point(size = 2, alpha = 0.7) +
theme_bw()+
scale_x_log10() + xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
facet_wrap(~D1) + theme(legend.position="none")
prevplot1
There are a few ASV’s that could be filtered out based on prevalence plots, specifically those in groups Diapherotrites, FCPU426, LCP-89, Thermotogae, and WS4. We will leave them, however, because our data set is fairly diverse (soil and water) and these could be taxa found only in one type of sample (i.e. soil).
extract ASV table from phyloseq object:
OTUs <- data.frame(otu_table(ps))
Total number of ASVs in the data set:
OTUsRS<- OTUs
OTUsRS$RowSum <- rowSums(OTUsRS)
OTUsRSnoZero <- OTUsRS$RowSum!=0
sum(OTUsRSnoZero)
## [1] 36007
Total number of ASVs per sample (range and mean):
OTUs0 <- OTUs!=0 #is this number not a zero? true (1) or false (0)
csums <- colSums(OTUs0) # col sums = observed ASV richness
csumdf <- as.data.frame(csums)
max(csumdf$csums) #2886
## [1] 2886
min(csumdf$csums) #64
## [1] 64
mean(csumdf$csums) #643
## [1] 642.489
Import denoising stats for both sequencing runs and merge them into one table:
RSJ1denoise <- read.table("RSJ1_2mergedDenoisingStats.tsv", header = TRUE)
Junedenoise <- read.table("16S_denoisingData.tsv", header = TRUE)
denoise<- rbind(RSJ1denoise,Junedenoise)
#total seqs before denoise
sum(denoise$input) #18436424
## [1] 18436424
min(denoise$input) #76217
## [1] 76217
max(denoise$input) #219584
## [1] 219584
mean(denoise$input) #137585.3
## [1] 137585.3
#totals after denoising
sum(denoise$non.chimeric) #11796526
## [1] 11796526
min(denoise$non.chimeric) #11061
## [1] 11061
max(denoise$non.chimeric) #153646
## [1] 153646
mean(denoise$non.chimeric) #88033.78
## [1] 88033.78
Remove taxa that are not abundant enough to be visible in relative abundance plot:
highPrev<- c("D_1__Acidobacteria", "D_1__Actinobacteria", "D_1__Bacteroidetes", "D_1__Chloroflexi", "D_1__Cyanobacteria", "D_1__Dadabacteria", "D_1__Epsilonbacteraeota", "D_1__Euryarchaeota", "D_1__Firmicutes", "D_1__Fusobacteria", "D_1__Marinimicrobia (SAR406 clade)", "D_1__Planctomycetes", "D_1__Proteobacteria", "D_1__Rokubacteria", "D_1__Verrucomicrobia", "D_1__Gemmatimonadetes")
psNHighPrev<- subset_taxa(ps, D1 %in% highPrev)
convert counts to relative abundance:
physeqPra<- transform_sample_counts(psNHighPrev, function(OTU) 100* OTU/sum(OTU))
glom at D1 taxonomic level (makes a cleaner plot):
glomD1<- tax_glom(physeqPra, "D1")
subset samples to include only field water samples and red soil samples:
psFieldRS<- subset_samples(glomD1, Treat == "A" | Treat == "RS")
make reative abundance plot at Phylum level:
newcolors= c("#332288", "#88CCEE", "#44AA99", "#FFCC00", "#CC3311", "#CC6677", "#FFCCCC", "#999933", "#DDCC77",
"#EE3377", "#882255", "#AA4499", "#BBCCEE", "#222255", "#CCEEFF", "#DDAA33")
metaFieldRS <- metatable[metatable$Treat =="A" | metatable$Treat == "RS", ]
metaFieldRS$Treatment <- factor(metaFieldRS$Treatment, levels=c("A1 6/13", "A2 6/13", "A3 6/13", "A4 6/13", "A1 6/16", "A2 6/16", "A3 6/16", "A4 6/16", "A1 6/19", "A2 6/19", "A3 6/19", "A4 6/19", "RS1", "RS2", "RS3", "RS4", "A1 9/28", "A2 9/28", "A3 9/28", "A4 9/28", "A1 10/01", "A2 10/01", "A3 10/01", "A4 10/01", "A1 10/03", "A2 10/03", "A3 10/03", "A4 10/03", "A1 10/08", "A2 10/08", "A3 10/08", "A4 10/08", "RS 2", "RS 3" ))
METArs<- sample_data(metaFieldRS)
sample_data(psFieldRS) <- METArs
taxabarplotD1<-plot_bar(psFieldRS, x= "Treatment", fill = "D1", facet_grid= ~Month) + scale_y_continuous(expand = c(0, 0)) + ggtitle("") + scale_fill_manual(values=newcolors ) + theme(legend.title=element_blank()) + geom_bar(aes(fill=D1), stat="identity", position="stack", width =0.9) +theme_classic() + theme(text = element_text(size=14))+theme(axis.text.x = element_text(angle = 90)) + xlab("Sample") +ylab("Relative Abundance(%)") + facet_grid(~Month,scales="free") + theme(text = element_text(size=14))
taxabarplotD1+ theme(legend.position="none")
#ggsave("JuneOctField_RelAbund2.pdf", width = 8, height = 5)
Legend:
sample_data(physeqPra) <- METArs
sar11<- subset_taxa(physeqPra, D3 == "D_3__SAR11 clade")
sar11_field<- subset_samples(sar11, Treat == "A" | Treat == "RS")
sar11D3<- tax_glom(sar11_field, "D3")
taxabarplotD1<-plot_bar(sar11D3, x= "Treatment", fill = "D3", facet_grid= ~Month) + scale_y_continuous(expand = c(0, 0)) + ggtitle("") + scale_fill_manual(values=newcolors ) + theme(legend.title=element_blank()) + geom_bar(aes(fill=D3), stat="identity", position="stack", width =0.9) +theme_classic() + theme(text = element_text(size=14))+theme(axis.text.x = element_text(angle = 90)) + xlab("Sample") +ylab("Relative Abundance(%)") + facet_grid(~Month,scales="free") + theme(text = element_text(size=14))
taxabarplotD1+ theme(legend.position="none")
syn<- subset_taxa(physeqPra, D3 == "D_3__Synechococcales")
syn_field<- subset_samples(syn, Treat == "A" | Treat == "RS")
synD3<- tax_glom(syn_field, "D3")
taxabarplotD1<-plot_bar(synD3, x= "Treatment", fill = "D3", facet_grid= ~Month) + scale_y_continuous(expand = c(0, 0)) + ggtitle("") + scale_fill_manual(values=newcolors[3] ) + theme(legend.title=element_blank()) + geom_bar(aes(fill=D3), stat="identity", position="stack", width =0.9) +theme_classic() + theme(text = element_text(size=14))+theme(axis.text.x = element_text(angle = 90)) + xlab("Sample") +ylab("Relative Abundance(%)") + facet_grid(~Month,scales="free") + theme(text = element_text(size=14))
taxabarplotD1+ theme(legend.position="none")
glomD2<- tax_glom(physeqPra, "D2")
psFieldRS<- subset_samples(glomD2, Treat == "A" | Treat == "RS")
#multiply color palette by 100 to have enough colors for the plot
newcolors= rep(c("#332288", "#88CCEE", "#44AA99", "#FFCC00", "#CC3311", "#CC6677", "#FFCCCC", "#999933", "#DDCC77",
"#EE3377", "#882255", "#AA4499", "#BBCCEE", "#222255", "#CCEEFF", "#DDAA33"), 100)
metaFieldRS <- metatable[metatable$Treat =="A" | metatable$Treat == "RS", ]
metaFieldRS$Treatment <- factor(metaFieldRS$Treatment, levels=c("A1 6/13", "A2 6/13", "A3 6/13", "A4 6/13", "A1 6/16", "A2 6/16", "A3 6/16", "A4 6/16", "A1 6/19", "A2 6/19", "A3 6/19", "A4 6/19", "RS1", "RS2", "RS3", "RS4", "A1 9/28", "A2 9/28", "A3 9/28", "A4 9/28", "A1 10/01", "A2 10/01", "A3 10/01", "A4 10/01", "A1 10/03", "A2 10/03", "A3 10/03", "A4 10/03", "A1 10/08", "A2 10/08", "A3 10/08", "A4 10/08", "RS 2", "RS 3" ))
METArs<- sample_data(metaFieldRS)
sample_data(psFieldRS) <- METArs
taxabarplotD2<-plot_bar(psFieldRS, x= "Treatment", fill = "D2", facet_grid= ~Month) + scale_y_continuous(expand = c(0, 0)) + ggtitle("") + scale_fill_manual(values=newcolors ) + theme(legend.title=element_blank()) + geom_bar(aes(fill=D2), stat="identity", position="stack", width =0.9) +theme_classic() + theme(text = element_text(size=14))+theme(axis.text.x = element_text(angle = 90)) + xlab("Sample") +ylab("Relative Abundance(%)") + facet_grid(~Month,scales="free") + theme(text = element_text(size=14))
taxabarplotD2+ theme(legend.position="none")
Legend:
glomD3<- tax_glom(physeqPra, "D3")
psFieldRS<- subset_samples(glomD3, Treat == "A" | Treat == "RS")
#multiply color palette by 100 to have enough colors for the plot
newcolors= rep(c("#332288", "#88CCEE", "#44AA99", "#FFCC00", "#CC3311", "#CC6677", "#FFCCCC", "#999933", "#DDCC77",
"#EE3377", "#882255", "#AA4499", "#BBCCEE", "#222255", "#CCEEFF", "#DDAA33"), 100)
metaFieldRS <- metatable[metatable$Treat =="A" | metatable$Treat == "RS", ]
metaFieldRS$Treatment <- factor(metaFieldRS$Treatment, levels=c("A1 6/13", "A2 6/13", "A3 6/13", "A4 6/13", "A1 6/16", "A2 6/16", "A3 6/16", "A4 6/16", "A1 6/19", "A2 6/19", "A3 6/19", "A4 6/19", "RS1", "RS2", "RS3", "RS4", "A1 9/28", "A2 9/28", "A3 9/28", "A4 9/28", "A1 10/01", "A2 10/01", "A3 10/01", "A4 10/01", "A1 10/03", "A2 10/03", "A3 10/03", "A4 10/03", "A1 10/08", "A2 10/08", "A3 10/08", "A4 10/08", "RS 2", "RS 3" ))
METArs<- sample_data(metaFieldRS)
sample_data(psFieldRS) <- METArs
taxabarplotD3<-plot_bar(psFieldRS, x= "Treatment", fill = "D3", facet_grid= ~Month) + scale_y_continuous(expand = c(0, 0)) + ggtitle("") + scale_fill_manual(values=newcolors, guide = guide_legend(ncol= 5)) + theme(legend.title=element_blank()) + geom_bar(aes(fill=D3), stat="identity", position="stack", width =0.9) +theme_classic() + theme(text = element_text(size=14))+theme(axis.text.x = element_text(angle = 90)) + xlab("Sample") +ylab("Relative Abundance(%)") + facet_grid(~Month,scales="free") + theme(text = element_text(size=14))
taxabarplotD3+ theme(legend.position="none")
Legend:
Get ps object with just mesocosm samples:
psMesoRS<- subset_samples(glomD1, Treat != "A" & Time != "t6" )
Make new metadata column for merging replicates:
metatable900 <- metatable
metatable900$TreatTime <- paste(metatable$Month, metatable900$Treat, metatable900$Time)
Collapse replicates
ps900 <- subset_samples(psNHighPrev, Treat != "A" & Time != "t6" & Treat != "RS")
sample_data(ps900) <- metatable900
mergedps <- merge_samples(ps900, "TreatTime")
## Warning in asMethod(object): NAs introduced by coercion
Fix metadata sample names after replicate merging:
meta2<-as.data.frame(sample_data(mergedps))
split<- do.call(rbind, strsplit(row.names(meta2), " "))
meta2$Treat <- split[,2]
meta2$Time<-split[,3]
meta2$Month<-split[,1]
meta2$SampleName <- paste(split[,2], split[,3])
meta2$desc <- row.names(meta2)
META <-sample_data(meta2)
sample_data(mergedps)<-META
Relative abundance transformation:
physeqPra<- transform_sample_counts(mergedps, function(OTU) 100* OTU/sum(OTU))
glomD1<- tax_glom(physeqPra, "D1")
Relative abundance plot:
glomD1 <- subset_samples(glomD1, Treat != "F")
taxabarplotD1<-plot_bar(glomD1, x= "SampleName", fill = "D1", facet_grid= ~Month) + scale_y_continuous(expand = c(0, 0)) + ggtitle("") + scale_fill_manual(values=newcolors ) + theme(legend.title=element_blank()) + geom_bar(aes(fill=D1), stat="identity", position="stack", width =0.9) +theme_classic() + theme(text = element_text(size=14))+theme(axis.text.x = element_text(angle = 90)) + xlab("Sample") +ylab("Relative Abundance(%)") + facet_grid(~Month,scales="free") + theme(text = element_text(size=14))
taxabarplotD1+ theme(legend.position="none") + xlab("")
#ggsave("JuneOctMESOs_RelAbundD1.pdf", width = 8, height = 5)
Estimate and plot ASV richness (with Breakaway):
psba<- subset_samples(ps, Type == "Field" | Type == "Soil")
ba <- breakaway(psba)
badf<- summary(ba) %>%
add_column("SampleID" = psba %>% otu_table %>% sample_names)
badf<- merge(badf, metatable, by = "SampleID")
badf$Time_f <- factor(badf$Time, levels=c("1", "13-Jun", "16-Jun", "19-Jun", "2", "28-Sep", "1-Oct","3-Oct", "8-Oct"))
baPlot <- ggplot(badf, aes(x=Time_f, y=estimate, fill = Month)) + facet_grid(. ~ Month, scales="free" )+ geom_boxplot() + theme_bw() + theme(text = element_text(size=14)) +ylab("Richness Estimate") +xlab("") + scale_fill_manual(values=c("#004488", "#DDAA33"))+ theme(text = element_text(size=18)) + theme(legend.position="none")
baPlot
Significance testing with DivNet betta function (June):
psjune <- subset_samples(ps, Month == "June")
psjune <- subset_samples(psjune, Type == "Field" | Type == "Soil")
jba <- breakaway(psjune)
jbt <- betta(summary(jba)$estimate,
summary(jba)$error,
make_design_matrix(psjune, "Time"))
jbt$table
## Estimates Standard Errors p-values
## (Intercept) 2739.3663 258.0268 0.000
## predictors13-Jun -2214.1876 516.0443 0.000
## predictors16-Jun -228.6101 516.0462 0.658
## predictors19-Jun -1924.1529 516.0477 0.000
Significance testing with DivNet betta function (Oct):
psoct <- subset_samples(ps, Month == "Oct")
psoct <- subset_samples(psoct, Type == "Field" | Type == "Soil")
oba <- breakaway(psoct)
obt <- betta(summary(oba)$estimate,
summary(oba)$error,
make_design_matrix(psoct, "Time"))
obt$table
## Estimates Standard Errors p-values
## (Intercept) 714.00787 89.51597 0.000
## predictors2 881.16285 268.54769 0.001
## predictors28-Sep -217.14817 189.88318 0.253
## predictors3-Oct 47.20811 189.88170 0.804
## predictors8-Oct -323.44129 189.87780 0.088
Estimate and plot ASV richness (with Breakaway):
psba<- subset_samples(ps, Type == "Meso" & Time !="t6" & Treat != "F")
ba <- breakaway(psba)
badf<- summary(ba) %>% add_column("SampleID" = psba %>% otu_table %>% sample_names)
badf<- merge(badf, metatable, by = "SampleID")
baPlot <- ggplot(badf, aes(x=Time, y=estimate, fill = Month)) + facet_grid(. ~ Month + Treat, scales="free" )+ geom_boxplot() + theme_bw() + theme(text = element_text(size=14)) +ylab("Richness Estimate") +xlab("") + scale_fill_manual(values=c("#004488", "#DDAA33"))+ theme(text = element_text(size=14)) + theme(legend.position="none")
baPlot
#ggsave("JuneOctMesosBreakaway.pdf", width = 8, height = 5)
Atchison Distance
(CLR + Euclidean)
Centered log-ratio normalization:
OTU4clr<- data.frame(t(data.frame(otu_table(ps))))
row.names(OTU4clr) <- gsub("\\.", "", row.names(OTU4clr))
OTUs.clr <- codaSeq.clr(OTU4clr + 0.5, samples.by.row=TRUE)
OTU2 <- otu_table(as.matrix(OTUs.clr), taxa_are_rows = FALSE)
metatable2<-metatable
row.names(metatable2) <- gsub("-", "", row.names(metatable2))
META2<- sample_data(metatable2)
psCLR <- phyloseq(OTU2,TAX,META2)
PERMANOVA by month for all field samples:
psCLRfield <- subset_samples(psCLR, Type == "Field")
OTUField <- data.frame(otu_table(psCLRfield))
metaF <- metatable2[row.names(metatable2) %in% row.names(OTUField),]
adonis2(vegdist(OTUField , method = "euclidean") ~ Month, data = metaF)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = vegdist(OTUField, method = "euclidean") ~ Month, data = metaF)
## Df SumOfSqs R2 F Pr(>F)
## Month 1 17630 0.17129 5.3741 0.001 ***
## Residual 26 85294 0.82871
## Total 27 102924 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
distmat<- vegdist(OTUField, method = "euclidean")
PCfield <- princomp(distmat)
library("factoextra")
fviz_eig(PCfield, geom = "bar", bar_width = 0.4) + ggtitle("")
metaF$id<- row.names(metaF)
metaF <- metaF[match(names(PCfield$scale), metaF$id),]
ggbiplot(PCfield, var.axes = FALSE, obs.scale = 1, groups = metaF$Time) +theme_bw() +scale_color_manual(values=c("#CC223B","#004488", "#CC223B", "#88CCEE", "#004488","#E6D02E" ,"#88CCEE"), name = "") + theme(text = element_text(size=18)) + theme(legend.position="none") +ggtitle("") + theme(legend.title = element_blank()) +geom_point(aes(color=metaF$Time, shape = metaF$Month, size = 4)) + scale_size_identity()
June and October PCAs:
psCLRfieldjune <- subset_samples(psCLRfield, Month == "June")
OTUsJuneField <- data.frame(otu_table(psCLRfieldjune))
metaJ <- metatable2[row.names(metatable2) %in% row.names(OTUsJuneField),]
set.seed(1)
distmat<- vegdist(OTUsJuneField, method = "euclidean")
PCjune <- princomp(distmat)
fviz_eig(PCjune, geom = "bar", bar_width = 0.4) + ggtitle("")
metaJ$id<- row.names(metaJ)
metaJ <- metaJ[match(names(PCjune$scale), metaJ$id),]
ggbiplot(PCjune, var.axes = FALSE, obs.scale = 1, groups = metaJ$Time) +theme_bw() +scale_color_manual(values=c("#004488", "#CC223B", "#88CCEE"), name = "") + theme(text = element_text(size=18)) + theme(legend.position="bottom") +ggtitle("A. June") + theme(legend.title = element_blank()) +geom_point(aes(color=metaJ$Time, size = 4)) + scale_size_identity()
psCLRfieldOct <- subset_samples(psCLR, Month == "Oct" & Type == "Field")
OTUsOctField <- data.frame(otu_table(psCLRfieldOct))
metaO <- metatable2[row.names(metatable2) %in% row.names(OTUsOctField),]
set.seed(1)
distmatO<- vegdist(OTUsOctField, method = "euclidean")
PCoct <- princomp(distmatO)
fviz_eig(PCoct, geom = "bar", bar_width = 0.4) + ggtitle("")
metaO$id<- row.names(metaO)
metaO <- metaO[match(names(PCoct$scale), metaO$id),]
ggbiplot(PCoct, var.axes = FALSE, obs.scale = 1, groups = metaO$Time) +theme_bw() +scale_color_manual(values=c("#004488", "#CC223B","#E6D02E" ,"#88CCEE"), limits=c("28-Sep", "1-Oct", "3-Oct", "8-Oct"), name = "") + theme(text = element_text(size=18)) + theme(legend.position="bottom") +ggtitle("B. October") + theme(legend.title = element_blank()) +geom_point(aes(color=metaO$Time, size = 4)) + scale_size_identity()
PERMANOVA by sampling date in June:
metatable_new <- metatable
row.names(metatable_new) <- gsub("-", "", row.names(metatable))
row.names(metatable_new) <- gsub("_", "", row.names(metatable_new))
OTUsJuneField <- data.frame(otu_table(psCLRfieldjune))
meta <- metatable2[row.names(metatable2) %in% row.names(OTUsJuneField),]
meta$BDA <- factor(meta$BDA, levels = c("B", "D", "A"))
set.seed(1)
adonis(vegdist(OTUsJuneField , method = "euclidean") ~ BDA, data = meta)
##
## Call:
## adonis(formula = vegdist(OTUsJuneField, method = "euclidean") ~ BDA, data = meta)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## BDA 2 7344 3672.2 0.89776 0.16632 0.671
## Residuals 9 36814 4090.5 0.83368
## Total 11 44159 1.00000
PERMANOVA by sampling date in October:
OTUsOctField <- data.frame(otu_table(psCLRfieldOct))
meta <- metatable[row.names(metatable) %in% row.names(OTUsOctField),]
meta$BDA <- factor(meta$BDA, levels = c("B", "D", "A"))
set.seed(1)
adonis(vegdist(OTUsOctField, method = "euclidean") ~ BDA, data = meta)
##
## Call:
## adonis(formula = vegdist(OTUsOctField, method = "euclidean") ~ BDA, data = meta)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## BDA 2 3779.4 1889.7 0.93109 0.1253 0.621
## Residuals 13 26384.1 2029.5 0.8747
## Total 15 30163.4 1.0000
June
psCLRmesoJ <- subset_samples(psCLR, Type == "Meso" & Time != "t6" & Month =="June" & Treat != "F")
OTUsMJ <- data.frame(otu_table(psCLRmesoJ))
metaMJ <- metatable2[row.names(metatable2) %in% row.names(OTUsMJ),]
distmat<- vegdist(OTUsMJ , method = "euclidean")
PCmj <- princomp(distmat)
fviz_eig(PCmj, geom = "bar", bar_width = 0.4) + ggtitle("")
metaMJ$id<- row.names(metaMJ)
metaMJ <- metaMJ[match(names(PCmj$scale), metaMJ$id),]
metaMJ$Time <- as.character(metaMJ$Time)
ggbiplot(PCmj, var.axes = FALSE, obs.scale = 1, groups = metaMJ$Time) +theme_bw() +scale_color_jcolors(palette="pal7", name = "Time") + theme(text = element_text(size=18)) + theme(legend.position="none") +ggtitle("A. June") + theme(legend.title = element_blank()) +geom_point(aes(color=metaMJ$Time, shape = metaMJ$Treat, size = 4)) + scale_size_identity()
October
psCLRmesoO <- subset_samples(psCLR, Type == "Meso" & Time != "t6" & Month =="Oct" &Treat != "F")
OTUsMO <- data.frame(otu_table(psCLRmesoO))
metaMO <- metatable2[row.names(metatable2) %in% row.names(OTUsMO),]
distmat<- vegdist(OTUsMO , method = "euclidean")
PCmo <- princomp(distmat)
fviz_eig(PCmo, geom = "bar", bar_width = 0.4) + ggtitle("")
metaMO$id<- row.names(metaMO)
metaMO <- metaMO[match(names(PCmo$scale), metaMO$id),]
metaMO$Time <- as.character(metaMO$Time)
ggbiplot(PCmo, var.axes = FALSE, obs.scale = 1, groups = metaMO$Time) +theme_bw() +scale_color_jcolors(palette="pal7", name = "Time") + theme(text = element_text(size=18)) + theme(legend.position="bottom") +ggtitle("B. October") + theme(legend.title = element_blank()) +geom_point(aes(color=metaMO$Time, shape = metaMO$Treat, size = 4)) + scale_size_identity()
Load metadata:
metatable <- read.csv("JuneOctSampleMap.csv", header = TRUE)
row.names(metatable) <- metatable[["SampleID"]]
detach("package:dplyr", unload=TRUE)
library("dplyr")
metatable <- metatable %>% select(SampleID, everything())
#convert to phyloseq object
META<- sample_data(metatable)
Load taxonomy:
taxonomy <- read.csv("JuneOctTaxonomy.csv", stringsAsFactors = FALSE)
names(taxonomy) <- c("row", "tax", "Confidence")
row.names(taxonomy) <-taxonomy[[1]]
taxonomy <- taxonomy[,(-1)]
#SILVA taxonomy is in one column, separate to be able to work with different taxonomic levels:
taxonomy <- separate(taxonomy, tax, c("D0","D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", "D10", "D11", "D12", "D13", "D14"), sep = ";", fill = "right")
#Keep the first 8 taxonomic levels (no assignments afetr that)
taxonomy <- taxonomy[,c(1:8)]
taxmat <- as.matrix(taxonomy)
#covert taxonomy table to phyloseq object
TAX = tax_table(taxmat)
create phyloseq object including feature table, taxonomy, and metadata:
ps = merge_phyloseq(phyloseq, TAX, META)
subset to just field samples:
psF <- subset_samples(ps, Treat == "A" )
for June 13th (before) to June 16th (during)
psFJune12 <- subset_samples(psF, Time == "13-Jun" | Time=="16-Jun" )
OTUspsfieldjune <- data.frame(otu_table(psFJune12))
names(OTUspsfieldjune) <- gsub("\\.", "", names(OTUspsfieldjune))
metatableJ12<- metatable[metatable$Time == "13-Jun" | metatable$Time=="16-Jun",]
row.names(metatableJ12)<- gsub("\\-", "", metatableJ12$SampleID)
metatableJ12 <- metatableJ12[,c("TreatRep", "BDA")]
names(metatableJ12) <- c("Replicate", "condition")
metatableJ12$name <- row.names(metatableJ12)
target <- names(OTUspsfieldjune)
metatableJ12<-metatableJ12[match(target, metatableJ12$name),]
set.seed(50)
sampleTable<- metatableJ12
sampleTable$condition <- factor(sampleTable$condition, levels = c("D", "B"))
ddseJ12 <- DESeqDataSetFromMatrix(countData = OTUspsfieldjune, colData = sampleTable, design = ~ condition)
## converting counts to integer mode
ddseJ12$condition <- relevel(ddseJ12$condition, ref = "B")
ddse2JBD <- DESeq(ddseJ12, test="Wald", fitType="parametric")
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
resJ12<- results(ddse2JBD, alpha = 0.05, cooksCutoff = FALSE )
summary(resJ12)
##
## out of 8130 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 267, 3.3%
## LFC < 0 (down) : 22, 0.27%
## outliers [1] : 0, 0%
## low counts [2] : 6329, 78%
## (mean count < 6)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
sigtabJ12wSoil = resJ12[which(resJ12$padj < 0.05), ]
dim(sigtabJ12wSoil)
## [1] 289 6
add taxonomy to sig tab
sigtabJ12wSoil = cbind(as(sigtabJ12wSoil, "data.frame"), as(tax_table(psF)[rownames(sigtabJ12wSoil), ], "matrix"))
Fold Change Figure for Significant SVs:
sigplot<- ggplot(sigtabJ12wSoil, aes(x=D1, y=log2FoldChange, color = D1)) + geom_jitter(size=1.5, alpha = 0.7) + theme(legend.title=element_blank()) + theme_bw() + ggtitle("June 16 v. June 13 - Significantly different ASVs") + theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
sigplot + theme(legend.position="none")
Fold Change figure for all results (not just those that are significant)
results <- as.data.frame(resJ12)
results = cbind(as(results, "data.frame"), as(tax_table(psF)[rownames(results), ], "matrix"))
resplot<- ggplot(results, aes(x=D1, y=log2FoldChange, color = D1)) + geom_jitter(size=1.5, alpha = 0.7) + theme(legend.title=element_blank()) + theme_bw() + ggtitle("June 16 v. June 13 ") + theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
resplot + theme(legend.position="none")
## Warning: Removed 27877 rows containing missing values (geom_point).
This plot shows that there is not actually clustering of logFC around 10 and 20, that only appears to be the when fold changes that are not significant are excluded from the plot (with logFC 0-|~10|).
psFJune13 <- subset_samples(psF, Time == "13-Jun" | Time=="19-Jun" )
OTUspsfieldjune13 <- data.frame(otu_table(psFJune13))
names(OTUspsfieldjune13) <- gsub("\\.", "", names(OTUspsfieldjune13))
aldex.in <- OTUspsfieldjune13[rowSums(OTUspsfieldjune13)>=0,]
metatableJ13<- metatable[metatable$Time == "13-Jun" | metatable$Time=="19-Jun",]
row.names(metatableJ13)<- gsub("\\-", "", metatableJ13$SampleID)
metatableJ13 <- metatableJ13[,c("TreatRep", "BDA")]
names(metatableJ13) <- c("Replicate", "condition")
metatableJ13$name <- row.names(metatableJ13)
target <- names(OTUspsfieldjune13)
metatableJ13<-metatableJ13[match(target, metatableJ13$name),]
sampleTable<- metatableJ13
ddseJ13 <- DESeqDataSetFromMatrix(countData = aldex.in, colData = sampleTable, design = ~ condition)
## converting counts to integer mode
## factor levels were dropped which had no samples
ddseJ13$condition <- relevel(ddseJ13$condition, ref = "B")
ddse2J13 <- DESeq(ddseJ13, test="Wald", fitType="parametric" )
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
resJ13<- results(ddse2J13,alpha = 0.05, cooksCutoff = FALSE)
summary(resJ13)
##
## out of 3089 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 10, 0.32%
## LFC < 0 (down) : 9, 0.29%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
sigtabJ13wSoil = resJ13[which(resJ13$padj < 0.05), ]
dim(sigtabJ13wSoil)
## [1] 19 6
add taxonomy to sig tab
sigtabJ13wSoil = cbind(as(sigtabJ13wSoil, "data.frame"), as(tax_table(psF)[rownames(sigtabJ13wSoil), ], "matrix"))
Fold Change Figure for Significant SVs:
sigplot<- ggplot(sigtabJ13wSoil, aes(x=D1, y=log2FoldChange, color = D1)) + geom_jitter(size=1.5, alpha = 0.7) + theme(legend.title=element_blank()) + theme_bw() +
ggtitle(" ") +
theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
sigplot + theme(legend.position="none")
psFJune23 <- subset_samples(psF, Time == "16-Jun" | Time=="19-Jun" )
OTUspsfieldjune23 <- data.frame(otu_table(psFJune23))
names(OTUspsfieldjune23) <- gsub("\\.", "", names(OTUspsfieldjune23))
metatableJ23<- metatable[metatable$Time == "16-Jun" | metatable$Time=="19-Jun",]
row.names(metatableJ23)<- gsub("\\-", "", metatableJ23$SampleID)
metatableJ23 <- metatableJ23[,c("TreatRep", "BDA")]
names(metatableJ23) <- c("Replicate", "condition")
metatableJ23$name <- row.names(metatableJ23)
target <- names(OTUspsfieldjune23)
metatableJ23<-metatableJ23[match(target, metatableJ23$name),]
sampleTable<- metatableJ23
aldex.in <- OTUspsfieldjune23[rowSums(OTUspsfieldjune23)>=0,]
ddseJ23 <- DESeqDataSetFromMatrix(countData = aldex.in, colData = sampleTable, design = ~ condition)
## converting counts to integer mode
## factor levels were dropped which had no samples
ddse2J23 <- DESeq(ddseJ23, test="Wald", fitType="parametric" )
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
resJ23<- results(ddse2J23,alpha = 0.05, cooksCutoff = FALSE, contrast = c("condition", "A", "D"))
summary(resJ23)
##
## out of 8773 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 22, 0.25%
## LFC < 0 (down) : 124, 1.4%
## outliers [1] : 0, 0%
## low counts [2] : 6950, 79%
## (mean count < 5)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
Total
OTUsoilJ <- data.frame(otu_table(subset_samples(ps, Treat == "RS" & Month =="June")))
OTUsoilJ <-OTUsoilJ[rowSums(OTUsoilJ )>0,]
#4,929 ASV in SOIL samples
sigtabJ12wSoil$Soil <- ifelse((row.names(sigtabJ12wSoil)) %in% row.names(OTUsoilJ), "Yes", "No")
sum(sigtabJ12wSoil$Soil=="Yes")
## [1] 38
over abundant?
sum(sigtabJ12wSoil[sigtabJ12wSoil$log2FoldChange>0,]$Soil=="Yes")
## [1] 37
sigtabJ12wSoil$D1 <- substring(sigtabJ12wSoil$D1, 6)
sigtabJ12wSoil$D2 <- substring(sigtabJ12wSoil$D2, 6)
sigtabJ12wSoil$D3 <- substring(sigtabJ12wSoil$D3, 6)
sigtabJ12wSoil$ASV_ID <- row.names(sigtabJ12wSoil)
sigtabJ12wSoil_O <- sigtabJ12wSoil[order(sigtabJ12wSoil$D1),]
sigtabJ12wSoil_O <- sigtabJ12wSoil_O %>% filter( D3 != "Chloroplast" & D3 != "uncultured bacterium" & D3 != "uncultured" & D3 != "Subgroup 2" & D3 != "") %>% filter(!is.na(D3))
uniqueD3 <- unique(sigtabJ12wSoil_O$D3)
sigtabJ12wSoil_O$D3 <- factor(sigtabJ12wSoil_O$D3, levels = uniqueD3)
sigplotJune12<- ggplot(sigtabJ12wSoil_O, aes(x=D3, y=log2FoldChange, color = D1, shape = Soil)) + geom_jitter(size=1, alpha = 0.8) + theme(legend.title=element_blank()) + theme_bw() + theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5)) +ggtitle("A. June 16 v. June 13") + theme(text = element_text(size=12)) + xlab("")
sigplotJune12 + theme(legend.position="none")
legend <- g_legend(sigplotJune12)
SVlegend <- grid.arrange(legend)
sigplotJune12<- ggplot(sigtabJ12wSoil_O, aes(x=D3, y=log2FoldChange, color = Soil)) + geom_jitter(size=1, alpha = 0.8) + theme(legend.title=element_blank()) + theme_bw() + scale_color_manual(values= c("#628395", "#FC471E" ) )+ theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5)) +ggtitle("A. June 16 v. June 13") + theme(text = element_text(size=12)) + xlab("")
sigplotJune12 + theme(legend.position="none")
#ggsave("June16June13_retest_foldchange.eps", width=9, height =5)
total:
sigtabJ13wSoil$Soil <- ifelse((row.names(sigtabJ13wSoil)) %in% row.names(OTUsoilJ), "Yes", "No")
sigtabJ13wSoil$D1 <- substring(sigtabJ13wSoil$D1, 6)
sigtabJ13wSoil$D2 <- substring(sigtabJ13wSoil$D2, 6)
sigtabJ13wSoil$D3 <- substring(sigtabJ13wSoil$D3, 6)
over abundant?
sum(sigtabJ13wSoil[sigtabJ13wSoil$log2FoldChange>0,]$Soil=="Yes")
## [1] 0
under?
sum(sigtabJ13wSoil[sigtabJ13wSoil$log2FoldChange<0,]$Soil=="Yes")
## [1] 1
sigtabJ13wSoil$ASV_ID <- row.names(sigtabJ13wSoil)
sigtabJ13wSoil_O <- sigtabJ13wSoil[order(sigtabJ13wSoil$D1),]
sigtabJ13wSoil_O <- sigtabJ13wSoil_O %>% filter( D3 != "Chloroplast" & D3 != "uncultured bacterium" & D3 != "uncultured") %>% filter(!is.na(D3))
uniqueD3 <- unique(sigtabJ13wSoil_O$D3)
sigtabJ13wSoil_O$D3 <- factor(sigtabJ13wSoil_O$D3, levels = uniqueD3)
sigplotJune13<- ggplot(sigtabJ13wSoil_O, aes(x=D3, y=log2FoldChange, color = Soil)) + geom_jitter(size=1, alpha = 0.8) + theme(legend.title=element_blank()) + theme_bw() + scale_color_manual(values= c("#628395", "#FC471E" ) )+ theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5)) +ggtitle("B. June 19 v. June 13") + theme(text = element_text(size=12)) + xlab("")
sigplotJune13 + theme(legend.position="none")
#ggsave("June19June13_retest_foldchange.pdf", width=1.5, height =5)
FACET
sigtabJ12wSoil_O$test <- "DB"
sigtabJ13wSoil_O$test <- "AB"
sigtabJfacet<- rbind(sigtabJ12wSoil_O, sigtabJ13wSoil_O)
#change sign of fold change so plot works when I rotate it 90 degrees in illustrator
sigtabJfacet$log2FoldChange <- sigtabJfacet$log2FoldChange * -1
facetsigplot<- ggplot(sigtabJfacet, aes(x=D3, y=log2FoldChange, color = Soil)) + geom_jitter(size=1, alpha = 0.8) + theme(legend.title=element_blank()) + theme_bw() + scale_color_manual(values= c("#628395", "#FC471E" ) )+ theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))+ theme(text = element_text(size=12)) + xlab("") + facet_grid(~test, scales="free_x", space = "free")
facetsigplot + theme(legend.position="none")
#ggsave("June_Facet_foldchange.pdf", width=9, height =5)
total:
sigtabJ23wSoil = resJ23[which(resJ23$padj < 0.05), ]
sigtabJ23wSoil$Soil <- ifelse((row.names(sigtabJ23wSoil)) %in% row.names(OTUsoilJ), "Yes", "No")
sum(sigtabJ23wSoil$Soil=="Yes")
## [1] 23
over abundant?
sum(sigtabJ23wSoil[sigtabJ23wSoil$log2FoldChange>0,]$Soil=="Yes")
## [1] 0
under?
sum(sigtabJ23wSoil[sigtabJ23wSoil$log2FoldChange<0,]$Soil=="Yes")
## [1] 23
psFoct12 <- subset_samples(psF, Time == "28-Sep" | Time=="1-Oct" | Time =="3-Oct")
OTUspsfieldOCT <- data.frame(otu_table(psFoct12))
aldex.in <-OTUspsfieldOCT[rowSums(OTUspsfieldOCT)>=0,]
metatableO12<- metatable[metatable$Time == "28-Sep" | metatable$Time=="1-Oct" | metatable$Time=="3-Oct",]
metatableO12 <- metatableO12[,c("TreatRep", "BDA")]
names(metatableO12) <- c("Replicate", "condition")
metatableO12$name <- row.names(metatableO12)
target <- names(OTUspsfieldOCT)
metatableO12<-metatableO12[match(target, metatableO12$name),]
sampleTable<- metatableO12
ddseO12 <- DESeqDataSetFromMatrix(countData = aldex.in, colData = sampleTable, design = ~ condition)
## converting counts to integer mode
## factor levels were dropped which had no samples
ddse2O12 <- DESeq(ddseO12, test="Wald", fitType="parametric" )
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 875 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
resO12<- results(ddse2O12,alpha = 0.05, cooksCutoff = FALSE, contrast = c("condition", "D", "B"))
summary(resO12)
##
## out of 3709 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 93, 2.5%
## LFC < 0 (down) : 14, 0.38%
## outliers [1] : 0, 0%
## low counts [2] : 2430, 66%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
sigtabO12wSoil = resO12[which(resO12$padj < 0.05), ]
sigtabO12wSoil = cbind(as(sigtabO12wSoil, "data.frame"), as(tax_table(psF)[rownames(sigtabO12wSoil), ], "matrix"))
sigplot<- ggplot(sigtabO12wSoil, aes(x=D1, y=log2FoldChange, color = D1)) + geom_jitter(size=1.5, alpha = 0.7) + theme(legend.title=element_blank()) + theme_bw() + ggtitle("Oct. 1 v. Sept. 28 ") + theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
sigplot + theme(legend.position="none")
psFoct13 <- subset_samples(psF, Time == "28-Sep" | Time=="8-Oct")
OTUspsfieldOCT <- data.frame(otu_table(psFoct13))
aldex.in <-OTUspsfieldOCT[rowSums(OTUspsfieldOCT)>=0,]
metatableO13<- metatable[metatable$Time == "28-Sep" | metatable$Time=="8-Oct",]
metatableO13 <- metatableO13[,c("TreatRep", "BDA")]
names(metatableO13) <- c("Replicate", "condition")
metatableO13$name <- row.names(metatableO13)
target <- names(OTUspsfieldOCT)
metatableO13<-metatableO13[match(target, metatableO13$name),]
sampleTable<- metatableO13
ddseO13 <- DESeqDataSetFromMatrix(countData = aldex.in, colData = sampleTable, design = ~ condition)
## converting counts to integer mode
## factor levels were dropped which had no samples
ddse2O13 <- DESeq(ddseO13, test="Wald", fitType="parametric" )
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
resO13<- results(ddse2O13,alpha = 0.05, cooksCutoff = FALSE, contrast = c("condition", "A", "B"))
summary(resO13)
##
## out of 2293 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 35, 1.5%
## LFC < 0 (down) : 11, 0.48%
## outliers [1] : 0, 0%
## low counts [2] : 389, 17%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
psFoct23 <- subset_samples(psF, Time == "8-Oct" | Time=="1-Oct" | Time =="3-Oct")
OTUspsfieldOCT <- data.frame(otu_table(psFoct23))
aldex.in <-OTUspsfieldOCT[rowSums(OTUspsfieldOCT)>=0,]
metatableO23<- metatable[metatable$Time == "8-Oct" | metatable$Time=="1-Oct" | metatable$Time=="3-Oct",]
metatableO23 <- metatableO23[,c("TreatRep", "BDA")]
names(metatableO23) <- c("Replicate", "condition")
metatableO23$name <- row.names(metatableO23)
target <- names(OTUspsfieldOCT)
metatableO23<-metatableO23[match(target, metatableO23$name),]
sampleTable<- metatableO23
ddseO23 <- DESeqDataSetFromMatrix(countData = aldex.in, colData = sampleTable, design = ~ condition)
## converting counts to integer mode
## factor levels were dropped which had no samples
ddse2O23 <- DESeq(ddseO23, test="Wald", fitType="parametric" )
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 787 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
resO23<- results(ddse2O23,alpha = 0.05, cooksCutoff = FALSE, contrast = c("condition", "A", "D"))
summary(resO23)
##
## out of 3227 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 12, 0.37%
## LFC < 0 (down) : 29, 0.9%
## outliers [1] : 0, 0%
## low counts [2] : 1720, 53%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
total:
OTUsoilO <- data.frame(otu_table(subset_samples(ps, Treat == "RS" & Month =="Oct")))
OTUsoilO <-OTUsoilO[rowSums(OTUsoilO )>0,]
#2,316 ASV in SOIL samples
sigtabO12wSoil$Soil <- ifelse((row.names(sigtabO12wSoil)) %in% row.names(OTUsoilO), "Yes", "No")
sum(sigtabO12wSoil$Soil=="Yes")
## [1] 0
plot:
sigtabO12wSoil$D3 <- substring(sigtabO12wSoil$D3, 6)
sigtabO12wSoil$ASV_ID <- row.names(sigtabO12wSoil)
sigtabO12wSoil_O <- sigtabO12wSoil[order(sigtabO12wSoil$D1),]
sigtabO12wSoil_O <- sigtabO12wSoil_O %>% filter( D3 != "Chloroplast" & D3 != "uncultured bacterium" & D3 != "uncultured") %>% filter(!is.na(D3))
uniqueD3 <- unique(sigtabO12wSoil_O$D3)
sigtabO12wSoil_O$D3 <- factor(sigtabO12wSoil_O$D3, levels = uniqueD3)
sigplotOct12<- ggplot(sigtabO12wSoil_O, aes(x=D3, y=log2FoldChange, color = D1, shape = Soil)) + geom_jitter(size=1, alpha = 0.8) + theme(legend.title=element_blank()) + theme_bw() + theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5)) +ggtitle("A. Oct 3/8 v. Sept. 28") + theme(text = element_text(size=12)) + xlab("")
sigplotOct12 + theme(legend.position="none")
legend <- g_legend(sigplotOct12)
SVlegend <- grid.arrange(legend)
sigplotOct12<- ggplot(sigtabO12wSoil_O, aes(x=D3, y=log2FoldChange, color = Soil)) + geom_jitter(size=1, alpha = 0.8) + theme(legend.title=element_blank()) + theme_bw() +
ggtitle(" ") +
theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))+ scale_color_manual(values= c("#628395", "#FC471E") ) +ggtitle("B. Oct 01 & Oct 03 v. Sept 28 ") +xlab("")
sigplotOct12
total:
OTUsoilO <- data.frame(otu_table(subset_samples(ps, Treat == "RS" & Month =="Oct")))
OTUsoilO <-OTUsoilO[rowSums(OTUsoilO )>0,]
#2,316 ASV in SOIL samples
sigtabO13wSoil = data.frame(resO13[which(resO13$padj < 0.05), ])
sigtabO13wSoil$Soil <- ifelse((row.names(sigtabO13wSoil)) %in% row.names(OTUsoilO), "Yes", "No")
sum(sigtabO13wSoil$Soil=="Yes")
## [1] 0
plot
sigtabO13wSoil = cbind(as(sigtabO13wSoil, "data.frame"), as(tax_table(psF)[rownames(sigtabO13wSoil), ], "matrix"))
sigtabO13wSoil$D3 <- substring(sigtabO13wSoil$D3, 6)
sigtabO13wSoil$ASV_ID <- row.names(sigtabO13wSoil)
sigtabO13wSoil_O <- sigtabO13wSoil[order(sigtabO13wSoil$D1),]
sigtabO13wSoil_O <- sigtabO13wSoil_O %>% filter( D3 != "Chloroplast" & D3 != "uncultured bacterium" & D3 != "uncultured") %>% filter(!is.na(D3))
uniqueD3 <- unique(sigtabO13wSoil_O$D3)
sigtabO13wSoil_O$D3 <- factor(sigtabO13wSoil_O$D3, levels = uniqueD3)
sigplotOct13<- ggplot(sigtabO13wSoil_O, aes(x=D3, y=log2FoldChange, color = Soil)) + geom_jitter(size=1, alpha = 0.8) + theme(legend.title=element_blank()) + theme_bw() +
ggtitle(" ") +
theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))+ scale_color_manual(values= c("#628395", "#FC471E") ) +ggtitle("B. Oct 08 v. Sept 28 ") +xlab("")
sigplotOct13
FACET
sigtabO12wSoil_O$test <- "DB"
sigtabO13wSoil_O$test <- "AB"
sigtabOfacet<- rbind(sigtabO12wSoil_O, sigtabO13wSoil_O)
sigtabOfacet$log2FoldChange <- sigtabOfacet$log2FoldChange* -1
Ofacetsigplot<- ggplot(sigtabOfacet, aes(x=D3, y=log2FoldChange, color = Soil)) + geom_jitter(size=1, alpha = 0.8) + theme(legend.title=element_blank()) + theme_bw() + scale_color_manual(values= c("#628395", "#FC471E" ) )+ theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))+ theme(text = element_text(size=12)) + xlab("") + facet_grid(~test, scales="free_x", space = "free")
Ofacetsigplot + theme(legend.position="none")
ggsave("Oct_Facet_Foldchange.pdf", width = 5, height = 4)
total:
sigtabO23wSoil = resO23[which(resO23$padj < 0.05), ]
sigtabO23wSoil$Soil <- ifelse((row.names(sigtabO23wSoil)) %in% row.names(OTUsoilO), "Yes", "No")
sum(sigtabO23wSoil$Soil=="Yes")
## [1] 0
deseqRes<- read.csv("DESeq_results_revised.csv")
deseqRes
## Month Comp Up UpSoil Down DownSoil
## 1 June BD 230 37 -21 -1
## 2 June BA 10 0 -8 -1
## 3 June DA 22 0 -101 -23
## 4 October BD 93 0 -14 0
## 5 October BA 35 0 -11 0
## 6 October DA 12 0 -29 0
deseqRes_melt <- melt(deseqRes, by = Comp)
## Using Month, Comp as id variables
deseqRes_melt$Comp <- factor(deseqRes_melt$Comp, levels = c("BA", "DA", "BD"))
dat1 <- subset(deseqRes_melt,value >= 0)
dat2 <- subset(deseqRes_melt,value < 0)
ggplot() +
geom_bar(data = dat1, aes(x=Comp, y=value, fill=variable),stat = "identity") +
geom_bar(data = dat2, aes(x=Comp, y=value, fill=variable),stat = "identity") +
scale_fill_brewer(type = "seq", palette = 1) + facet_wrap(~Month, ncol =1, strip.position = c( "right")) + coord_flip() + scale_fill_manual(values= c("#628395", "#FC471E", "#628395", "#FC471E")) + geom_hline(yintercept=0, color= "black") +
ylim(-275, 275) + theme(legend.position="none") + xlab("Pairwise comparisons") + ylab("Number of differentially abundant ASVs") + theme(text = element_text(size=18))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
#ggsave("numberofASVsPlot.pdf", width = 6, height =5)
sessionInfo()
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS 10.14.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid stats4 parallel stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] factoextra_1.0.7 ggpubr_0.2.4
## [3] magrittr_1.5 forcats_0.4.0
## [5] stringr_1.4.0 purrr_0.3.3
## [7] readr_1.3.1 tibble_3.0.1
## [9] tidyverse_1.3.0 intrval_0.1-1
## [11] ggbiplot_0.55 scales_1.1.0
## [13] plyr_1.8.5 CoDaSeq_0.99.2
## [15] car_3.0-5 carData_3.0-3
## [17] zCompositions_1.3.3-1 truncnorm_1.0-8
## [19] NADA_1.6-1 survival_3.1-8
## [21] MASS_7.3-51.5 ALDEx2_1.14.1
## [23] breakaway_4.6.11 dplyr_0.8.3
## [25] jcolors_0.0.4 metagMisc_0.0.4
## [27] vegan_2.5-6 lattice_0.20-38
## [29] permute_0.9-5 gridExtra_2.3
## [31] DESeq2_1.22.2 SummarizedExperiment_1.12.0
## [33] DelayedArray_0.8.0 BiocParallel_1.16.6
## [35] matrixStats_0.55.0 Biobase_2.42.0
## [37] GenomicRanges_1.34.0 GenomeInfoDb_1.18.2
## [39] IRanges_2.16.0 S4Vectors_0.20.1
## [41] BiocGenerics_0.28.0 qiime2R_0.99.1
## [43] reshape2_1.4.3 RColorBrewer_1.1-2
## [45] tidyr_1.0.0 ggplot2_3.3.0
## [47] phyloseq_1.26.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.1.5 Hmisc_4.3-0
## [4] igraph_1.2.4.2 splines_3.5.0 digest_0.6.23
## [7] foreach_1.4.7 htmltools_0.4.0 fansi_0.4.0
## [10] checkmate_1.9.4 memoise_1.1.0 cluster_2.1.0
## [13] openxlsx_4.1.4 Biostrings_2.50.2 annotate_1.60.1
## [16] modelr_0.1.5 colorspace_1.4-1 ggrepel_0.8.1
## [19] blob_1.2.0 rvest_0.3.5 haven_2.2.0
## [22] xfun_0.11 crayon_1.3.4 RCurl_1.95-4.12
## [25] jsonlite_1.6 genefilter_1.64.0 iterators_1.0.12
## [28] ape_5.3 glue_1.3.1 gtable_0.3.0
## [31] zlibbioc_1.28.0 XVector_0.22.0 Rhdf5lib_1.4.3
## [34] abind_1.4-5 DBI_1.0.0 Rcpp_1.0.3
## [37] xtable_1.8-4 htmlTable_1.13.3 foreign_0.8-72
## [40] bit_1.1-14 Formula_1.2-3 htmlwidgets_1.5.1
## [43] httr_1.4.1 acepack_1.4.1 ellipsis_0.3.0
## [46] farver_2.0.1 pkgconfig_2.0.3 XML_3.98-1.20
## [49] nnet_7.3-12 dbplyr_1.4.2 locfit_1.5-9.1
## [52] labeling_0.3 tidyselect_0.2.5 rlang_0.4.5
## [55] AnnotationDbi_1.44.0 munsell_0.5.0 cellranger_1.1.0
## [58] tools_3.5.0 cli_2.0.0 generics_0.0.2
## [61] RSQLite_2.1.4 ade4_1.7-15 broom_0.5.6
## [64] evaluate_0.14 biomformat_1.10.1 yaml_2.2.0
## [67] knitr_1.26 bit64_0.9-7 fs_1.3.1
## [70] zip_2.0.4 nlme_3.1-140 xml2_1.2.2
## [73] compiler_3.5.0 rstudioapi_0.10 curl_4.3
## [76] ggsignif_0.6.0 reprex_0.3.0 geneplotter_1.60.0
## [79] stringi_1.4.3 Matrix_1.2-18 multtest_2.38.0
## [82] vctrs_0.2.4 pillar_1.4.3 lifecycle_0.2.0
## [85] data.table_1.12.8 bitops_1.0-6 R6_2.4.1
## [88] latticeExtra_0.6-28 rio_0.5.16 codetools_0.2-16
## [91] assertthat_0.2.1 rhdf5_2.26.2 withr_2.1.2
## [94] GenomeInfoDbData_1.2.0 mgcv_1.8-31 hms_0.5.2
## [97] rpart_4.1-15 rmarkdown_1.18 lubridate_1.7.4
## [100] base64enc_0.1-3